In [ ]:
!pip install numpy pandas plotly pywt pyts scipy sklearn statsmodels
!pip install ruptures

I. Import dependency¶

In [354]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.express as px
import pywt
from pyts.decomposition import SingularSpectrumAnalysis
from scipy.fft import fft, fftfreq
from scipy.ndimage import gaussian_filter1d
from scipy.signal import butter, lfilter, freqz, welch
from sklearn.ensemble import IsolationForest
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.seasonal import seasonal_decompose, STL
from statsmodels.tsa.stattools import acf
import plotly.graph_objs as go
from plotly.offline import iplot

import plotly.offline as pyo

pyo.init_notebook_mode(connected=True)

II. Clean dataset¶

In [355]:
# Replace 'path_to_file.csv' with the actual path to your CSV file
df = pd.read_csv('test.csv')

# Now 'df' holds the DataFrame object created from the CSV file
df
Out[355]:
date drawdowns drawdowns MA Local Min
0 2010-01-01 0.000000 0.000000 0
1 2010-01-04 0.000000 0.000000 0
2 2010-01-05 0.000000 0.000000 0
3 2010-01-06 0.000000 0.000000 0
4 2010-01-07 0.000000 0.000000 0
... ... ... ... ...
3572 NaN -0.000313 -0.000298 0
3573 NaN -0.000313 -0.000298 0
3574 NaN -0.000313 -0.000299 0
3575 NaN -0.000313 -0.000299 0
3576 NaN -0.000313 -0.000300 0

3577 rows × 4 columns

In [356]:
df.dropna(inplace=True)
df
Out[356]:
date drawdowns drawdowns MA Local Min
0 2010-01-01 0.000000 0.000000 0
1 2010-01-04 0.000000 0.000000 0
2 2010-01-05 0.000000 0.000000 0
3 2010-01-06 0.000000 0.000000 0
4 2010-01-07 0.000000 0.000000 0
... ... ... ... ...
3556 2023-08-21 -0.000302 -0.000289 0
3557 2023-08-22 -0.000299 -0.000289 0
3558 2023-08-23 -0.000291 -0.000290 0
3559 2023-08-24 -0.000293 -0.000291 0
3560 2023-08-25 -0.000301 -0.000291 0

3561 rows × 4 columns

II. Plot dataset¶

In [357]:
import pandas as pd
import plotly.graph_objs as go

# Assuming df is your DataFrame and it's already been cleaned with df.dropna()

# Make sure your 'date' column is in datetime format if it's not already
df['date'] = pd.to_datetime(df['date'])

# Create traces for each line
trace1 = go.Scatter(
    x=df['date'],
    y=df['drawdowns'],
    mode='lines',
    name='Drawdowns'
)

trace2 = go.Scatter(
    x=df['date'],
    y=df['drawdowns MA'],
    mode='lines',
    name='Drawdowns MA'
)

# Combine traces into a list
data = [trace1, trace2]

# Define the layout of the graph
layout = go.Layout(
    title='Drawdowns and Drawdowns Moving Average Over Time',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Value'),
    legend=dict(y=1, x=1)
)

# Create a figure with data and layout
fig = go.Figure(data=data, layout=layout)

# Show the plot
fig.show()
2010201220142016201820202022−300μ−250μ−200μ−150μ−100μ−50μ0
DrawdownsDrawdowns MADrawdowns and Drawdowns Moving Average Over TimeDateValue
plotly-logomark
In [358]:
# Define the columns for which you want to calculate the autocorrelation
columns_to_acf = ['drawdowns', 'drawdowns MA']

# Create an empty list to hold all the traces
traces = []

# Loop through the list of columns and calculate the autocorrelation for each one
for column in columns_to_acf:
    autocorrelation_values = acf(df[column], nlags=40, fft=True)
    lags = list(range(len(autocorrelation_values)))
    
    # Create a Scatter trace for each column
    traces.append(go.Scatter(
        x=lags,
        y=autocorrelation_values,
        mode='lines+markers',
        name=f'Autocorrelation {column}'
    ))

# Define the layout of the plot
layout = go.Layout(
    title='Autocorrelation of Drawdowns and Drawdowns MA',
    xaxis=dict(title='Lag'),
    yaxis=dict(title='Autocorrelation'),
    hovermode='closest'
)

# Create a figure with the traces and layout
fig = go.Figure(data=traces, layout=layout)

# Show the plot
fig.show()
05101520253035400.70.750.80.850.90.951
Autocorrelation drawdownsAutocorrelation drawdowns MAAutocorrelation of Drawdowns and Drawdowns MALagAutocorrelation
plotly-logomark

Autocorrelation of Drawdowns (blue line): This is similar to the first plot you shared. It starts at 1 at lag 0 and decreases as the lag increases, indicating that the correlation between drawdowns decreases the further apart they are in time.

Autocorrelation of Drawdowns MA (red line): The autocorrelation of the moving average of drawdowns also starts at 1 at lag 0, but it decreases at a slower rate compared to the raw drawdowns. This suggests that the moving average has a more consistent pattern over time, or that it "smooths out" the volatility in the drawdowns.

The moving average seems to retain higher autocorrelation at higher lags compared to the raw drawdowns. Indicates:

  • The moving average is less volatile and has a longer "memory" of past values, which could be due to the smoothing effect that averages out the extreme values (peaks and troughs).
  • Investment strategies based on moving averages might exhibit less erratic behavior and could potentially be more predictable over time compared to strategies that react to every drawdown.
  • For risk management, this could imply that a strategy that takes into account the moving average of drawdowns might respond less to short-term fluctuations and more to established trends.
  • Overall, the presence of higher autocorrelation in the moving average suggests that it may be a better indicator for understanding long-term trends in drawdown behavior. Investors or analysts may prefer to use moving averages to assess the risk and develop strategies, as it could offer a more stable and less noisy signal.

III. Unit root, stationary, random walk and drift testing¶

Null Hypothesis (H_0): The series has a unit root (is a random walk), possibly with drift.

Alternative Hypothesis (H_1): The series has no unit root (is not a random walk) and is stationary around a deterministic trend (if testing for drift).

In [359]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller

# Assuming df is your DataFrame with 'date' as the DateTimeIndex
# and it contains 'drawdowns' and 'drawdowns MA' columns

# Define the columns to test and the regression models
columns_to_test = ['drawdowns', 'drawdowns MA']
regression_models = ['c', 'ct', 'ctt', 'n']  # 'nc' for no constant, 'c' for constant (drift)

# Loop over each column and regression model
for column in columns_to_test:
    for regression in regression_models:
        result = adfuller(df[column], regression=regression)
        print(f'\nDickey-Fuller Test: {column} with regression \'{regression}\'')
        print('ADF Statistic: %f' % result[0])
        print('p-value: %f' % result[1])
        print('Critical Values:')
        for key, value in result[4].items():
            print('\t%s: %.3f' % (key, value))

        # Interpretation
        if result[1] < 0.05:
            print(f"The time series '{column}' is stationary.")
            if regression == 'c':
                print("The time series is not a random walk with drift (reject H0).")
            else:
                print("The time series is not a random walk without drift (reject H0).")
        else:
            print(f"The time series '{column}' is not stationary.")
            if regression == 'c':
                print("The time series may be a random walk with drift (fail to reject H0).")
            else:
                print("The time series may be a random walk without drift (fail to reject H0).")
        print('\n')
Dickey-Fuller Test: drawdowns with regression 'c'
ADF Statistic: -3.086974
p-value: 0.027532
Critical Values:
	1%: -3.432
	5%: -2.862
	10%: -2.567
The time series 'drawdowns' is stationary.
The time series is not a random walk with drift (reject H0).



Dickey-Fuller Test: drawdowns with regression 'ct'
ADF Statistic: -3.240519
p-value: 0.076626
Critical Values:
	1%: -3.961
	5%: -3.412
	10%: -3.128
The time series 'drawdowns' is not stationary.
The time series may be a random walk without drift (fail to reject H0).



Dickey-Fuller Test: drawdowns with regression 'ctt'
ADF Statistic: -3.786029
p-value: 0.056509
Critical Values:
	1%: -4.374
	5%: -3.834
	10%: -3.554
The time series 'drawdowns' is not stationary.
The time series may be a random walk without drift (fail to reject H0).



Dickey-Fuller Test: drawdowns with regression 'n'
ADF Statistic: -1.159594
p-value: 0.224450
Critical Values:
	1%: -2.566
	5%: -1.941
	10%: -1.617
The time series 'drawdowns' is not stationary.
The time series may be a random walk without drift (fail to reject H0).



Dickey-Fuller Test: drawdowns MA with regression 'c'
ADF Statistic: -2.310093
p-value: 0.168733
Critical Values:
	1%: -3.432
	5%: -2.862
	10%: -2.567
The time series 'drawdowns MA' is not stationary.
The time series may be a random walk with drift (fail to reject H0).



Dickey-Fuller Test: drawdowns MA with regression 'ct'
ADF Statistic: -2.452802
p-value: 0.351947
Critical Values:
	1%: -3.961
	5%: -3.412
	10%: -3.128
The time series 'drawdowns MA' is not stationary.
The time series may be a random walk without drift (fail to reject H0).



Dickey-Fuller Test: drawdowns MA with regression 'ctt'
ADF Statistic: -2.879353
p-value: 0.355679
Critical Values:
	1%: -4.374
	5%: -3.834
	10%: -3.554
The time series 'drawdowns MA' is not stationary.
The time series may be a random walk without drift (fail to reject H0).



Dickey-Fuller Test: drawdowns MA with regression 'n'
ADF Statistic: -0.480404
p-value: 0.504161
Critical Values:
	1%: -2.566
	5%: -1.941
	10%: -1.617
The time series 'drawdowns MA' is not stationary.
The time series may be a random walk without drift (fail to reject H0).


Dickey-Fuller test indicate that for both the 'drawdowns' and 'drawdowns MA' time series, the null hypothesis of the presence of a unit root cannot be rejected at conventional significance levels.

ADF Statistic: This is the value that the test statistic takes. In both cases, it is -2.310093. This value needs to be more negative than the critical values to reject the null hypothesis with confidence.

p-value: The p-value indicates the probability that you would see the observed result if the null hypothesis were true. In both cases, the p-value is 0.168733, which is greater than the common significance levels (0.05, 0.01, etc.). This means that the test does not provide strong evidence against the null hypothesis.

Critical Values: These values are the thresholds for the ADF statistic at different confidence levels. If the ADF statistic is less negative than these values, you fail to reject the null hypothesis. For both time series, the ADF statistic is not more negative than any of the critical values.

Based on the Dickey-Fuller test results:

The test statistic is not negative enough compared to the critical values. The p-value is above 0.05 (and well above 0.01), which is not typically considered statistically significant. Therefore, we do not have enough evidence to conclude that the time series are stationary. This suggests that the 'drawdowns' and 'drawdowns MA' time series may have a unit root, implying that they could be non-stationary.

Non-stationarity is an important concept in time series analysis because many forecasting models assume that the underlying data is stationary. If your data is non-stationary, you may need to transform it to make it stationary before using these models. Common transformations include differencing the data, taking the logarithm or another power transformation, or decomposing the series and analyzing the residuals.

In [360]:
import pandas as pd
from statsmodels.tsa.holtwinters import SimpleExpSmoothing, ExponentialSmoothing

# Assuming 'df' is your DataFrame and 'drawdowns' is the column you want to forecast

# Simple Exponential Smoothing
ses_model = SimpleExpSmoothing(df['drawdowns']).fit(smoothing_level=0.2)
df['SES'] = ses_model.fittedvalues

# Double Exponential Smoothing
# We need to specify if the trend is additive 'add' or multiplicative 'mul'. 
# We will assume an additive trend here.
des_model = ExponentialSmoothing(df['drawdowns'], trend='add').fit(smoothing_level=0.2)
df['DES'] = des_model.fittedvalues

# Triple Exponential Smoothing
# We need to specify if the trend and seasonality are additive 'add' or multiplicative 'mul'.
# We also need to specify the seasonal period length. Here we assume an additive trend,
# additive seasonality, and a seasonal period of 12.
# Note: You should only use Triple Exponential Smoothing if you know there is seasonality in your data.
tes_model = ExponentialSmoothing(df['drawdowns'], trend='add', seasonal='add', seasonal_periods=12).fit(smoothing_level=0.2)
df['TES'] = tes_model.fittedvalues

# Display the head of the DataFrame to see the fitted values
print(df.head())
        date  drawdowns  drawdowns MA  Local Min  SES           DES  \
0 2010-01-01        0.0           0.0          0  0.0 -2.920835e-07   
1 2010-01-04        0.0           0.0          0  0.0 -5.263949e-07   
2 2010-01-05        0.0           0.0          0  0.0 -7.133176e-07   
3 2010-01-06        0.0           0.0          0  0.0 -8.621424e-07   
4 2010-01-07        0.0           0.0          0  0.0 -9.803402e-07   

            TES  
0  5.655149e-07  
1  4.635638e-06  
2  2.736789e-06  
3 -1.684994e-06  
4 -4.295719e-06  
C:\Users\ZemingZhang\anaconda3\Lib\site-packages\pandas\util\_decorators.py:210: EstimationWarning:

Model has no free parameters to estimate. Set optimized=False to suppress this warning

In [361]:
# List of column names to plot, which are present in your dataframe
columns_to_plot = ['drawdowns', 'SES', 'DES', 'TES', 'drawdowns MA']

# Create a list to hold all the traces
traces = []

# Loop through the list of columns and create a scatter trace for each one
for column in columns_to_plot:
    traces.append(go.Scatter(
        x = df.index,
        y = df[column],
        mode = 'lines',
        name = column
    ))

# Define the layout of the plot
layout = go.Layout(
    title = 'Drawdowns with Exponential Smoothing',
    xaxis = dict(title = 'Date'),
    yaxis = dict(title = 'Drawdowns'),
)

# Create a figure with the traces and layout
fig = go.Figure(data=traces, layout=layout)

# Plot the figure
iplot(fig)
0500100015002000250030003500−300μ−250μ−200μ−150μ−100μ−50μ0
drawdownsSESDESTESdrawdowns MADrawdowns with Exponential SmoothingDateDrawdowns
plotly-logomark
In [362]:
# Gaussian Filter
def gaussian_filter(series, std_dev=1):
    return gaussian_filter1d(series, std_dev)

# Random Cut Forest for anomaly detection
def rcf_noise_reduction(series, contamination_factor='auto'):
    # Fit the model
    iso_forest = IsolationForest(contamination=contamination_factor, random_state=42)
    anomalies = iso_forest.fit_predict(series.values.reshape(-1, 1))
    
    # Replace anomalies by the median of the data
    # This is a simplification and may not be suitable for all applications
    series_cleaned = series.copy()
    series_cleaned[anomalies == -1] = series.mean()
    
    return series_cleaned
In [363]:
# Apply the filters
std_dev = 2

df['drawdowns_cleaned'] = rcf_noise_reduction(df['drawdowns'], contamination_factor=0.01)  # Adjust the contamination factor as needed
df['Gaussian'] = gaussian_filter(df['drawdowns'], std_dev)
df['RCF'] = rcf_noise_reduction(df['drawdowns'])

# Plotting using Plotly
fig = go.Figure()

# Original Data
fig.add_trace(go.Scatter(y=df['drawdowns'], mode='lines', name='Original'))


# RCF Filtered Data (normalized anomaly scores, not actual filtering)
fig.add_trace(go.Scatter(y=df['RCF'], mode='lines', name='RCF Scores'))

# Gaussian Filtered Data
fig.add_trace(go.Scatter(y=df['Gaussian'], mode='lines', name='Gaussian'))

# Update layout
fig.update_layout(title='Drawdowns with Various Noise Reduction Techniques', xaxis_title='Time', yaxis_title='Drawdown Value')

# Show plot
fig.show()
0500100015002000250030003500−300μ−250μ−200μ−150μ−100μ−50μ0
OriginalRCF ScoresGaussianDrawdowns with Various Noise Reduction TechniquesTimeDrawdown Value
plotly-logomark
In [364]:
import pandas as pd
import numpy as np
from plotly import graph_objs as go
from statsmodels.tsa.seasonal import seasonal_decompose
import pywt

# Assuming 'df' is your DataFrame and 'drawdowns' is the column with data
# Also assuming that the DataFrame index is a date index

# Seasonal Decomposition
result = seasonal_decompose(df['drawdowns'], model='additive', period=6)
df['trend'] = result.trend
df['seasonal'] = result.seasonal
df['resid'] = result.resid

# Wavelet Transform for noise reduction
coeffs = pywt.wavedec(df['drawdowns'], 'db1', level=1)
coeffs[1:] = [pywt.threshold(i, value=0.5, mode='soft') for i in coeffs[1:]]
reconstructed_series = pywt.waverec(coeffs, 'db1')

# Plotting with Plotly
fig = go.Figure()

# Original series
fig.add_trace(go.Scatter(x=df.index, y=df['drawdowns'], mode='lines', name='Original Series'))

# Decomposition components
fig.add_trace(go.Scatter(x=df.index, y=df['trend'], mode='lines', name='Trend'))
fig.add_trace(go.Scatter(x=df.index, y=df['seasonal'], mode='lines', name='Seasonality'))
fig.add_trace(go.Scatter(x=df.index, y=df['resid'], mode='lines', name='Residuals (Noise)'))

# Wavelet reconstructed series
fig.add_trace(go.Scatter(x=df.index, y=reconstructed_series, mode='lines', name='Wavelet Reconstructed Series'))

# Update layout
fig.update_layout(title='Time Series Decomposition and Wavelet Noise Reduction',
                  xaxis_title='Date',
                  yaxis_title='Drawdown Value')

# Show plot
fig.show()
0500100015002000250030003500−300μ−250μ−200μ−150μ−100μ−50μ050μ
Original SeriesTrendSeasonalityResiduals (Noise)Wavelet Reconstructed SeriesTime Series Decomposition and Wavelet Noise ReductionDateDrawdown Value
plotly-logomark
In [365]:
import pandas as pd

# Assuming 'df' is your DataFrame and 'data' is the column with local minima

# Define the window size for the moving average
window_size = 100  # This is an example value; you can change it based on your data

# Compute the moving average using the 'rolling' method
df['smoothed'] = df['drawdowns'].rolling(window=window_size, center=True).min()

# The 'center=True' parameter will center the window on the data point, 
# so the smoothed value at time t will be the average of times (t-2, t-1, t, t+1, t+2) for a window size of 5
from scipy.signal import savgol_filter

# Apply the Savitzky-Golay filter
# window_length should be odd and less than the size of your data
# polyorder is the order of the polynomial used to fit the samples
df['smoothed'] = savgol_filter(df['drawdowns'], window_length=5, polyorder=2)

import plotly.graph_objs as go
from plotly.offline import iplot

# Assuming 'df' is your DataFrame and it contains 'data' as the original data
# and 'smoothed' as the smoothed data

# Create traces for the original and smoothed data
trace_original = go.Scatter(
    x=df.index,
    y=df['drawdowns'],
    mode='lines',
    name='Original Data'
)

trace_smoothed = go.Scatter(
    x=df.index,
    y=df['smoothed'],
    mode='lines',
    name='Smoothed Data'
)

# Combine the traces
data = [trace_original, trace_smoothed]

# Define the layout of the plot
layout = go.Layout(
    title='Original vs Smoothed Data',
    xaxis=dict(title='Index'),
    yaxis=dict(title='Values'),
    hovermode='closest'
)

# Create the figure with the data and layout
fig = go.Figure(data=data, layout=layout)

# Show the plot
iplot(fig)
0500100015002000250030003500−300μ−250μ−200μ−150μ−100μ−50μ0
Original DataSmoothed DataOriginal vs Smoothed DataIndexValues
plotly-logomark
In [366]:
import numpy as np
from scipy.signal import butter, filtfilt

# Function to apply a Butterworth low-pass filter
def butter_lowpass_filter(data, cutoff, fs, order):
    """
    Apply a Butterworth low-pass filter to the data.

    Parameters:
    - data: The original data as a 1D numpy array or list.
    - cutoff: The cutoff frequency of the filter (in the same units as fs).
    - fs: The sampling rate of the data.
    - order: The order of the filter (an integer).

    Returns:
    - y: The filtered data as a numpy array.
    """
    nyq = 0.5 * fs  # Nyquist Frequency
    normal_cutoff = cutoff / nyq
    # Get the filter coefficients 
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    y = filtfilt(b, a, data)
    return y


# To demonstrate the implementation of a Butterworth low-pass filter and graphing it,
# we'll first need to generate some synthetic data resembling drawdowns over dates.
# Since the user hasn't provided actual data, we'll simulate this for demonstration purposes.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt

# Apply a Butterworth low-pass filter
def butter_lowpass_filter(data, cutoff, fs, order=5):
    nyq = 0.5 * fs  # Nyquist Frequency
    normal_cutoff = cutoff / nyq
    # Get the filter coefficients 
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    y = filtfilt(b, a, data)
    return y

# Assuming daily data, the sampling rate `fs` is 1 day^-1 and the cutoff frequency is arbitrary chosen.
# In practice, you'd choose a cutoff frequency based on the characteristics of your data.
fs = 1  # one sample per day
cutoff = 0.1  # Cutoff frequency chosen for demonstration purposes

# Apply the filter to the Drawdown column
filtered_drawdowns = butter_lowpass_filter(df['drawdowns'], cutoff, fs)

# Add the filtered data to the DataFrame
df['Filtered_Drawdown'] = filtered_drawdowns

# Plot the original and filtered data
plt.figure(figsize=(14, 7))
plt.plot(df['date'], df['drawdowns'], label='Original Drawdown')
plt.plot(df['date'], df['Filtered_Drawdown'], label='Filtered Drawdown', color='red')
plt.xlabel('date')
plt.ylabel('Drawdown')
plt.title('Original vs Filtered Drawdown')
plt.legend()
plt.show()
In [367]:
import plotly.graph_objs as go

# Function to find local minima based on the specified conditions
def find_selected_minima(data):
    local_min_indices = (np.diff(np.sign(np.diff(data))) > 0).nonzero()[0] + 1
    selected_minima = []
    up_trend = False
    last_minima = data[local_min_indices[0]]
    last_minima_idx = local_min_indices[0]
    
    for i in local_min_indices[1:]:
        # If the current minima is more than the last recorded minima
        if data[i] > last_minima:
            if not selected_minima:
                selected_minima.append(last_minima_idx)
            elif last_minima < data[selected_minima[-1]]:
                selected_minima.append(last_minima_idx)
        last_minima_idx = i
        last_minima = data[i]
            
    return selected_minima




# Apply the function to the filtered drawdown data
selected_minima_indices = find_selected_minima(df['drawdowns'])

print(selected_minima_indices)

# Plotting the original drawdowns, the filtered drawdowns, and the selected minima
fig = go.Figure()

# Original Drawdowns
fig.add_trace(go.Scatter(x=df['date'], y=df['drawdowns'],
                         mode='lines', name='Original Drawdown'))

# Selected Minima
fig.add_trace(go.Scatter(x=df['date'][selected_minima_indices], 
                         y=df['drawdowns'][selected_minima_indices],
                         mode='markers', name='Selected Minima',
                         marker=dict(color='green', size=8)))

fig.update_layout(title='Original vs Filtered Drawdown with Selected Minima',
                  xaxis_title='Date',
                  yaxis_title='Drawdown')
fig.show()
[12, 22, 86, 268, 464, 576, 869, 877, 3313, 3357, 3424]
2010201220142016201820202022−300μ−250μ−200μ−150μ−100μ−50μ0
Original DrawdownSelected MinimaOriginal vs Filtered Drawdown with Selected MinimaDateDrawdown
plotly-logomark
In [368]:
import pandas as pd
import numpy as np
import ruptures as rpt

# Change point detection
# Using Binary Segmentation algorithm
model = "l2"  # "l2" cost function for detecting mean shift, other options are "l1", "rbf", etc.
algo = rpt.Binseg(model=model).fit(df['drawdowns'].values)
result = algo.predict(n_bkps=10)  # We're specifying that we want to detect 1 change point

# The 'result' will contain the indices of the change points
result
Out[368]:
[705, 855, 990, 1240, 1430, 1665, 2970, 3205, 3325, 3400, 3561]
In [369]:
from scipy.signal import find_peaks

data = df['drawdowns']
# Calculate the cumulative sum of the data
cumsum = np.cumsum(data - np.mean(data))

# Find local maxima and minima in the cumulative sum
peaks, _ = find_peaks(cumsum)
troughs, _ = find_peaks(-cumsum)

# Combine and sort the indices of the peaks and troughs
change_points = peaks

# Plot the data with lines indicating the change points
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(df['date'], data, label='Drawdowns')

# Plotting the change points
for cp in change_points:
    ax.axvline(x=df['date'][cp], color='r', linestyle='--', linewidth=0.5)

ax.set_xlabel('Date')
ax.set_ylabel('Drawdowns')
ax.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
In [370]:
# Calculate the cumulative sum of the data
cumsum = np.cumsum(df['drawdowns'] - np.mean(df['drawdowns']))

# Find local minima in the cumulative sum (which are peaks in the negative cumulative sum)
troughs, _ = find_peaks(-cumsum)

# Plot the data with lines indicating the local minima
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(df['date'], df['drawdowns'], label='Drawdowns')

# Plotting the local minima
for trough in troughs:
    ax.axvline(x=df['date'][trough], color='r', linestyle='--', linewidth=0.5, label='Detected Local Minima' if trough == troughs[0] else "")

ax.set_xlabel('Date')
ax.set_ylabel('Drawdowns')
ax.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
In [371]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ruptures as rpt  # make sure to install ruptures: pip install ruptures

# Assuming df is your DataFrame and it has already been created with 'date' and 'drawdowns' columns

# Change point detection with Binary Segmentation
model = "rbf"  # "l2" nor (least squares) model for change point detection
algo = rpt.Binseg(model=model).fit(df['drawdowns'].values)
my_bkps = algo.predict(n_bkps=4)  # the number of breakpoints to detect

# Plot the results
rpt.display(df['drawdowns'].values, my_bkps, figsize=(10, 5))
plt.show()
In [372]:
# Calculate the cumulative sum of the data
cumsum = np.cumsum(df['drawdowns MA'] - np.mean(df['drawdowns MA']))

# Find local minima in the cumulative sum (which are peaks in the negative cumulative sum)
troughs, _ = find_peaks(-cumsum)

# Plot the data with lines indicating the local minima
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(df['date'], df['drawdowns'], label='Drawdowns')

# Plotting the local minima
for trough in troughs:
    ax.axvline(x=df['date'][trough], color='r', linestyle='--', linewidth=0.5, label='Detected Local Minima' if trough == troughs[0] else "")

ax.set_xlabel('Date')
ax.set_ylabel('Drawdowns')
ax.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

troughs
Out[372]:
array([ 634, 1039, 1180, 1708, 2821, 3129], dtype=int64)
In [373]:
import plotly.graph_objs as go

# Function to find local minima based on the specified conditions
def find_selected_minima(data):
    local_min_indices = (np.diff(np.sign(np.diff(data))) > 0).nonzero()[0] + 1
    selected_minima = []
    last_minima = data[local_min_indices[0]]
    last_minima_idx = local_min_indices[0]
    
    for i in local_min_indices[1:]:
        # If the current minima is more than the last recorded minima
        if data[i] > last_minima:
            if not selected_minima:
                selected_minima.append(last_minima_idx)
            elif last_minima < data[selected_minima[-1]]:
                selected_minima.append(last_minima_idx)
        last_minima_idx = i
        last_minima = data[i]
            
    return selected_minima




# Apply the function to the filtered drawdown data
selected_minima_indices = find_selected_minima(df['drawdowns'])

print(selected_minima_indices)

# Plotting the original drawdowns, the filtered drawdowns, and the selected minima
fig = go.Figure()

# Original Drawdowns
fig.add_trace(go.Scatter(x=df['date'], y=df['drawdowns'],
                         mode='lines', name='Original Drawdown'))

# Selected Minima
fig.add_trace(go.Scatter(x=df['date'][selected_minima_indices], 
                         y=df['drawdowns'][selected_minima_indices],
                         mode='markers', name='Selected Minima',
                         marker=dict(color='green', size=8)))

fig.update_layout(title='Original vs Filtered Drawdown with Selected Minima',
                  xaxis_title='Date',
                  yaxis_title='Drawdown')
fig.show()
[12, 22, 86, 268, 464, 576, 869, 877, 3313, 3357, 3424]
2010201220142016201820202022−300μ−250μ−200μ−150μ−100μ−50μ0
Original DrawdownSelected MinimaOriginal vs Filtered Drawdown with Selected MinimaDateDrawdown
plotly-logomark
In [374]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS
import plotly.graph_objs as go

# Define the rolling window size
window_size = 30  # for example, a 30-day rolling window
df1 = df.copy(deep=True)
df1.set_index('date', inplace=True)

# Add a constant term for the regression intercept
df1['const'] = 1

# Define the model
rolling_model = RollingOLS(endog=df1['drawdowns'], exog=df1[['const']], window=window_size)

# Fit the model
rolling_fit = rolling_model.fit()

# Create the line chart for the original 'drawdowns' data
trace_original = go.Scatter(
    x=df1.index,
    y=df1['drawdowns'],
    mode='lines',
    name='Drawdowns'
)

# Create the line chart for the rolling regression coefficients (slope of the regression line)
trace_rolling_slope = go.Scatter(
    x=df1.index[window_size-1:],  # The first 'window_size-1' values will be NaN
    y=rolling_fit.params['const'][window_size-1:],
    mode='lines',
    name=f'Rolling Slope ({window_size}-day window)'
)

# Combine the plots
data = [trace_original, trace_rolling_slope]

# Define the layout for the plot
layout = go.Layout(
    title='Rolling Regression with Plotly',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Drawdowns')
)

# Create the figure
fig = go.Figure(data=data, layout=layout)

# Plot the figure
fig.show()
2010201220142016201820202022−300μ−250μ−200μ−150μ−100μ−50μ0
DrawdownsRolling Slope (30-day window)Rolling Regression with PlotlyDateDrawdowns
plotly-logomark
In [375]:
# You can then access the rolling coefficients and other statistics like this:
coefficients = rolling_fit.params
coefficients
Out[375]:
const
date
2010-01-01 NaN
2010-01-04 NaN
2010-01-05 NaN
2010-01-06 NaN
2010-01-07 NaN
... ...
2023-08-21 -0.000302
2023-08-22 -0.000302
2023-08-23 -0.000302
2023-08-24 -0.000301
2023-08-25 -0.000301

3561 rows × 1 columns

In [376]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Calculate the area above the graph but below the y=0 line.
# Since all values are below zero, we can integrate the negative of the drawdowns.
area_above_graph = np.trapz(-df['drawdowns'], df['date'].astype(np.int64) // 10**9)
area_above_graph
Out[376]:
39201.68171300796
In [377]:
import plotly.graph_objects as go

# Since the DataFrame and area calculation are already defined in previous cells,
# we will proceed directly to plotting with Plotly.

# Create the Plotly figure
fig = go.Figure()



# Add the drawdowns line
fig.add_trace(go.Scatter(
    x=df['date'],
    y=df['drawdowns'],
    mode='lines',
    name='Drawdowns',
    line_color='blue',
))



# Update layout to add titles and adjust the legend
fig.update_layout(
    title='Area Above the Drawdowns Graph and Below Zero with Vertical Line at X=0',
    xaxis_title='Date',
    yaxis_title='Drawdowns',
    legend_title='Legend',
    shapes=[
        # Line Vertical
        go.layout.Shape(
            type="line",
            x0=df['date'].iloc[0],
            y0=0,
            x1=df['date'].iloc[-1],
            y1=0,
            line=dict(
                color="Black",
                width=2,
                dash="dashdot",
            )
        )],
)

# Show the figure
fig.show()
2010201220142016201820202022−300μ−250μ−200μ−150μ−100μ−50μ0
Area Above the Drawdowns Graph and Below Zero with Vertical Line at X=0DateDrawdowns
plotly-logomark
In [378]:
import plotly.graph_objects as go

# Assuming 'troughs' is a list of indices where the drawdowns hit 0.
# For the sake of demonstration, let's create a synthetic 'troughs' array.
troughs = df['drawdowns'][df['drawdowns'] >= 0].index

# Create the Plotly figure
fig = go.Figure()

# Add the drawdowns line
fig.add_trace(go.Scatter(x=df['date'], y=df['drawdowns'], mode='lines', name='Drawdowns'))

# Add vertical lines at each point where the drawdowns line crosses zero
for trough in troughs:
    fig.add_shape(
        type='line',
        x0=df['date'][trough],
        y0=df['drawdowns'].min(),
        x1=df['date'][trough],
        y1=0,
        line=dict(color='Red', width=2, dash='dash'),
        name='Zero Crossing'
    )

# Add the area fill below zero for drawdowns
fig.add_trace(go.Scatter(
    x=df['date'],
    y=df['drawdowns'],
    fill='tozeroy',  # fill area between trace0 and y=0
    fillcolor='white',
    mode='lines',
    name='Drawdowns',
    line_color='blue',
))

# Create the line chart for the rolling regression coefficients (slope of the regression line)
fig.add_trace(go.Scatter(
    x=df1.index[window_size-1:],  # The first 'window_size-1' values will be NaN
    y=rolling_fit.params['const'][window_size-1:],
    mode='lines',
    name=f'Rolling Slope ({window_size}-day window)'
))

# Add the zero line
fig.add_shape(
    type='line',
    x0=df['date'].min(),
    y0=0,
    x1=df['date'].max(),
    y1=0,
    line=dict(color='Black', width=1, dash='dash'),
)

# Update the layout to add titles and adjust the legend
fig.update_layout(
    title='Drawdowns with Vertical Lines at Zero Crossings',
    xaxis_title='Date',
    yaxis_title='Drawdowns',
    showlegend=False
)

# Show the figure
fig.show()
2010201220142016201820202022−300μ−250μ−200μ−150μ−100μ−50μ0
Drawdowns with Vertical Lines at Zero CrossingsDateDrawdowns
plotly-logomark
In [379]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS
import plotly.graph_objs as go

# Define the rolling window size
window_size = 30  # for example, a 30-day rolling window
df1 = df.copy(deep=True)
df1.set_index('date', inplace=True)

# Add a constant term for the regression intercept
df1['const'] = 1

# Define the model
rolling_model = RollingOLS(endog=df1['drawdowns'], exog=df1[['const']], window=window_size)

# Fit the model
rolling_fit = rolling_model.fit()

# Create the line chart for the original 'drawdowns' data
trace_original = go.Scatter(
    x=df1.index,
    y=df1['drawdowns'],
    mode='lines',
    name='Drawdowns'
)

# Create the line chart for the rolling regression coefficients (slope of the regression line)
trace_rolling_slope = go.Scatter(
    x=df1.index[window_size-1:],  # The first 'window_size-1' values will be NaN
    y=rolling_fit.params['const'][window_size-1:],
    mode='lines',
    name=f'Rolling Slope ({window_size}-day window)'
)

# Combine the plots
data = [trace_original, trace_rolling_slope]

# Define the layout for the plot
layout = go.Layout(
    title='Rolling Regression with Plotly',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Drawdowns')
)

# Create the figure
fig = go.Figure(data=data, layout=layout)

# Plot the figure
fig.show()
2010201220142016201820202022−300μ−250μ−200μ−150μ−100μ−50μ0
DrawdownsRolling Slope (30-day window)Rolling Regression with PlotlyDateDrawdowns
plotly-logomark
In [380]:
import pandas as pd
import numpy as np
from plotly import graph_objs as go
from statsmodels.tsa.seasonal import seasonal_decompose
import pywt

window_size = 30  # for example, a 30-day rolling window
df1 = df.copy(deep=True)
df1.set_index('date', inplace=True)

# Seasonal Decomposition
result = seasonal_decompose(df1['drawdowns'], model='additive', period=12)
df1['trend'] = result.trend

# Define the rolling window size


# Add a constant term for the regression intercept
df1['const'] = 1

# Define the model
rolling_model = RollingOLS(endog=df1['trend'], exog=df1[['const']], window=window_size)

# Fit the model
rolling_fit = rolling_model.fit()

# Plotting with Plotly
fig = go.Figure()

# Original series
fig.add_trace(go.Scatter(x=df1.index, y=df['drawdowns'], mode='lines', name='Original Series'))

# Decomposition components
fig.add_trace(go.Scatter(x=df1.index, y=df['trend'], mode='lines', name='Trend'))

# Create the line chart for the rolling regression coefficients (slope of the regression line)
fig.add_trace(go.Scatter(
    x=df1.index[window_size-1:],  # The first 'window_size-1' values will be NaN
    y=rolling_fit.params['const'][window_size-1:],
    mode='lines',
    name=f'Rolling Slope ({window_size}-day window)'
))

# Update layout
fig.update_layout(title='Time Series Decomposition With rolling',
                  xaxis_title='Date',
                  yaxis_title='Drawdown Value')

# Show plot
fig.show()
2010201220142016201820202022−300μ−250μ−200μ−150μ−100μ−50μ0
Original SeriesTrendRolling Slope (30-day window)Time Series Decomposition With rollingDateDrawdown Value
plotly-logomark
In [381]:
# Replace 'path_to_file.csv' with the actual path to your CSV file
df = pd.read_csv('test.csv')
In [382]:
import pandas as pd
import numpy as np
from plotly import graph_objs as go
from statsmodels.regression.rolling import RollingOLS
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy.signal import find_peaks

# Assuming 'df' is your DataFrame with an index of dates and a 'drawdowns' column
# Also assuming df is sorted by the date index

# Perform seasonal decomposition
result = seasonal_decompose(df['drawdowns'], model='additive', period=12)
df['trend'] = result.trend.fillna(method='bfill').fillna(method='ffill')

# Define the rolling window size
window_size = 30  # for example, a 30-day rolling window

# Add a constant term for the regression intercept
df['const'] = 1

# Define the model
rolling_model = RollingOLS(endog=df['trend'], exog=df[['const']], window=window_size)

# Fit the model
rolling_fit = rolling_model.fit()

# Shift the rolling slope to align with the original series
shifted_params = rolling_fit.params.shift(-window_size // 2, fill_value=np.nan)

# Calculate the cumulative sum of the shifted parameters
cumsum_shifted_params = np.cumsum(shifted_params['const'] - np.mean(shifted_params['const']))

# Find local minima in the cumulative sum
troughs, _ = find_peaks(-cumsum_shifted_params)

# Create the Plotly figure
fig = go.Figure()

# Add the original drawdowns series
fig.add_trace(go.Scatter(x=df.index, y=df['drawdowns'], mode='lines', name='Original Series'))

# Add the trend component from the seasonal decomposition
fig.add_trace(go.Scatter(x=df.index, y=df['trend'], mode='lines', name='Trend'))

# Add the shifted rolling regression slope
fig.add_trace(go.Scatter(
    x=df.index,
    y=shifted_params['const'],
    mode='lines',
    name='Shifted Rolling Slope'
))

# Add vertical lines at each trough
for trough in troughs:
    fig.add_vline(
        x=df.index[trough],
        line=dict(color='Red', width=2, dash='dash'),
        annotation_text="Local Minima",
        annotation_position="bottom right"
    )

# Update layout
fig.update_layout(
    title='Time Series Decomposition with Rolling Regression Slope and Troughs',
    xaxis_title='Date',
    yaxis_title='Drawdown Value'
)

# Show plot
fig.show()
0500100015002000250030003500−300μ−250μ−200μ−150μ−100μ−50μ0
Original SeriesTrendShifted Rolling SlopeTime Series Decomposition with Rolling Regression Slope and TroughsDateDrawdown ValueLocal MinimaLocal MinimaLocal MinimaLocal MinimaLocal MinimaLocal MinimaLocal MinimaLocal MinimaLocal Minima
plotly-logomark
In [383]:
import pandas as pd
import numpy as np
from plotly import graph_objs as go
from statsmodels.regression.rolling import RollingOLS
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy.signal import find_peaks
import pandas as pd
import numpy as np
from plotly import graph_objs as go
from statsmodels.regression.linear_model import OLS
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy.signal import find_peaks

# Define the rolling window size
window_size = 30  # for example, a 30-day rolling window

# Add a constant term for the regression intercept
df['const'] = 1

# Define the rolling regression model
rolling_model = RollingOLS(endog=df['drawdowns'], exog=df[['const']], window=window_size)

# Fit the rolling regression model
rolling_fit = rolling_model.fit()

# Shift the rolling slope to align with the original series
shifted_params = rolling_fit.params.shift(-window_size // 2, fill_value=np.nan)

# Extract the rolling intercept
df['intercept'] = shifted_params['const']
# Perform seasonal decomposition on the intercept
result = seasonal_decompose(df['intercept'].dropna(), model='additive', period=12)  # Drop NA for decomposition

# Since the decomposition outputs NaNs for both ends, we'll fill NaNs for plotting purposes
df['trend'] = result.trend
df['trend'].fillna(method='bfill', inplace=True)
# Calculate the cumulative sum of the shifted parameters
cumsum_shifted_params = np.cumsum(df['trend'] - np.mean(df['trend']))

# Find local minima in the cumulative sum
troughs, _ = find_peaks(-cumsum_shifted_params.dropna())

# Add the first and last indices to the troughs array
troughs = np.insert(troughs, 0, 0)  # Insert the start index
troughs = np.append(troughs, len(df) - 1)  # Append the last index

troughs
Out[383]:
array([   0,  266,  607, 1001, 1163, 1666, 2786, 3015, 3098, 3576],
      dtype=int64)
In [384]:
# Create the Plotly figure
fig = go.Figure()

# Add the original drawdowns series
fig.add_trace(go.Scatter(x=df.index, y=df['drawdowns'], mode='lines', name='Original Series'))

# Add the intercept from the rolling regression
fig.add_trace(go.Scatter(x=df.index, y=df['intercept'], mode='lines', name='Rolling Intercept'))

# Add the trend component from the seasonal decomposition
fig.add_trace(go.Scatter(x=df.index, y=df['trend'], mode='lines', name='Trend'))

# Add vertical lines at each trough and plot segment slopes
for i in range(len(troughs) - 1):
    # Define the start and end of the segment
    start, end = troughs[i], troughs[i + 1]
    
    # Extract the segment of the original 'drawdowns' series
    segment = df['drawdowns'].iloc[start:end+1]
    
    # Fit a linear regression (OLS) to the segment
    X = np.arange(len(segment)).reshape(-1, 1)  # Independent variable (time steps as a simple range)
    Y = segment.values  # Dependent variable
    slope_model = OLS(Y, sm.add_constant(X)).fit()
    slope = slope_model.params[1]
    
    # Calculate the regression line for the segment
    reg_line = slope_model.params[0] + slope_model.params[1] * X.flatten()
    
    # Add the slope line to the plot
    fig.add_trace(go.Scatter(
        x=df.index[start:end+1],
        y=reg_line,
        mode='lines',
        name=f'Slope {i+1}'
    ))

    # Add vertical lines at troughs
    fig.add_vline(
        x=df.index[start],
        line=dict(color='Red', width=2, dash='dash'),
        annotation_text="Trough",
        annotation_position="bottom right"
    )

# Update layout
fig.update_layout(
    title='Rolling Regression Intercept followed by Seasonal Decomposition',
    xaxis_title='Date',
    yaxis_title='Value'
)

# Show plot
fig.show()
0500100015002000250030003500−300μ−250μ−200μ−150μ−100μ−50μ0
Original SeriesRolling InterceptTrendSlope 1Slope 2Slope 3Slope 4Slope 5Slope 6Slope 7Slope 8Slope 9Rolling Regression Intercept followed by Seasonal DecompositionDateValueTroughTroughTroughTroughTroughTroughTroughTroughTrough
plotly-logomark
In [386]:
!jupyter nbconvert noise_reduction.ipynb --to html
[NbConvertApp] Converting notebook noise_reduction.ipynb to html
[NbConvertApp] Writing 5196766 bytes to noise_reduction.html